library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
quarters <- 1:4
years <- c(2017, 2018, 2019)
types <- c("Electric", "Gas")
pge_elec <- NULL
pge_gas <- NULL
for(type in types){
#2017-2019
for(year in years){
for(quarter in quarters){
filename <- paste0(
"PGE Data/PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp = read_csv(filename)
if(type == "Electric"){
pge_elec <- rbind(pge_elec,temp)
} else if(type == "Gas"){
pge_gas <- rbind(pge_gas,temp)
}
}
}
#2020
for(quarter in c(1,2)){
filename <- paste0(
"PGE Data/PGE_2020_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp = read_csv(filename)
if(type == "Electric"){
pge_elec <- rbind(pge_elec,temp)
} else if(type == "Gas"){
pge_gas <- rbind(pge_gas,temp)
}
}
saveRDS(pge_elec, "pge_elec.rds")
saveRDS(pge_gas, "pge_gas.rds")
}
pge_elec_processed <-
pge_elec %>%
filter(CUSTOMERCLASS %in% c("Elec- Residential", "Elec- Commercial")) %>%
select(!c(COMBINED, AVERAGEKWH)) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T),
TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T),
) %>%
mutate(
TOTALKBTU = TOTALKWH*3412.14,
AVERAGEKBTU = TOTALKBTU/TOTALCUSTOMERS
) %>%
select(!c(TOTALKWH))
pge_gas_processed <-
pge_gas %>%
filter(CUSTOMERCLASS %in% c("Gas- Residential", "Gas- Commercial")) %>%
select(!c(COMBINED, AVERAGETHM)) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALTHM = sum(TOTALTHM, na.rm = T),
TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T),
) %>%
mutate(
TOTALKBTU = TOTALTHM*99976.1,
AVERAGEKBTU = TOTALKBTU/TOTALCUSTOMERS
) %>%
select(!c(TOTALTHM))
pge_merge <-
rbind(pge_gas_processed, pge_elec_processed) %>%
mutate(YEAR_MONTH = as.Date(paste0(YEAR, "-", MONTH, "-", 01)))
# convert year-month to month (2018/01 -> 13)
# mutate(
# YEAR_MONTH =
# ifelse(YEAR == 2017, MONTH,
# ifelse(YEAR == 2018, MONTH + 12,
# ifelse(YEAR == 2019, MONTH + 24,
# MONTH + 36)))
# )
# mutate(YEAR_MONTH = paste0(YEAR, "-", MONTH))
# sort
# pge_merge <- pge_merge[order(pge_merge$YEAR),]
pge_commercial <-
pge_merge %>%
filter(CUSTOMERCLASS %in% c("Elec- Commercial", "Gas- Commercial"))
pge_residential <-
pge_merge %>%
filter(CUSTOMERCLASS %in% c("Elec- Residential", "Gas- Residential"))
saveRDS(pge_merge, "pge_merge.rds")
saveRDS(pge_commercial, "pge_commercial.rds")
saveRDS(pge_residential, "pge_residential.rds")
pge_merge_chart <-
pge_merge %>%
ggplot() +
geom_bar(
aes(
x = YEAR_MONTH %>% factor(),
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Month",
y = "kBTU",
title = "PG&E Territory Monthly Electricity and Gas Usage, 2017-Current",
fill = "Energy Type"
) + theme(text = element_text(size=10),
axis.text.x = element_text(angle=90, hjust=1)
)
pge_commercial_chart <-
pge_commercial %>%
ggplot() +
geom_bar(
aes(
x = YEAR_MONTH %>% factor(),
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Month",
y = "kBTU",
title = "PG&E Territory Monthly Electricity and Gas Usage, 2017-Current",
fill = "Energy Type"
) + theme(text = element_text(size=10),
axis.text.x = element_text(angle=90, hjust=1)
)
pge_residential_chart <-
pge_residential %>%
ggplot() +
geom_bar(
aes(
x = YEAR_MONTH %>% factor(),
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Month",
y = "kBTU",
title = "PG&E Territory Monthly Electricity and Gas Usage, 2017-Current",
fill = "Energy Type"
) + theme(text = element_text(size=10),
axis.text.x = element_text(angle=90, hjust=1)
)
pge_merge_chart %>%
ggplotly() %>%
layout(
xaxis = list(fixedrange = T),
yaxis = list(fixedrange = T)
) %>%
config(displayModeBar = F)
pge_commercial_chart %>%
ggplotly() %>%
layout(
xaxis = list(fixedrange = T),
yaxis = list(fixedrange = T)
) %>%
config(displayModeBar = F)
pge_residential_chart %>%
ggplotly() %>%
layout(
xaxis = list(fixedrange = T),
yaxis = list(fixedrange = T)
) %>%
config(displayModeBar = F)
Comments: It looks like both residential and commercial electricity usage stayed pretty constant in the last 6 months while gas usage has decreased significantly. However, it’s hard to tell whether it’s due to COVID or simply the seasons, since it follows a similar trend to previous years. If anything, energy usage is higher than at the same months in previous years.
elec_before_covid <-
pge_elec %>%
filter(
YEAR == 2019 &
MONTH == 12 &
CUSTOMERCLASS %in% c("Elec- Residential", "Elec- Commercial")
) %>%
select(!c(COMBINED, MONTH, YEAR, TOTALCUSTOMERS, AVERAGEKWH)) %>%
group_by(ZIPCODE) %>%
summarize(TOTALKWH = sum(TOTALKWH, na.rm = T))
elec_after_covid <-
pge_elec %>%
filter(
YEAR == 2020 &
MONTH == 2 &
CUSTOMERCLASS %in% c("Elec- Residential", "Elec- Commercial")
) %>%
select(!c(COMBINED, MONTH, YEAR, TOTALCUSTOMERS, AVERAGEKWH)) %>%
group_by(ZIPCODE) %>%
summarize(TOTALKWH = sum(TOTALKWH, na.rm = T))
elec_combined <-
elec_before_covid %>%
rename(KWH_BEFORE = TOTALKWH) %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character(),
KWH_AFTER = elec_after_covid$TOTALKWH,
CHANGE_KWH = elec_after_covid$TOTALKWH - elec_before_covid$TOTALKWH
)
projection <- 4326
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
counties("CA", cb = T, progress_bar = F) %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
elec_combined_geo <-
elec_combined %>%
left_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(projection)
## Make vector of colors for values smaller than 0 (20 colors)
rc1 <- colorRampPalette(colors = c("red", "white"), space = "Lab")(400)
## Make vector of colors for values larger than 0 (180 colors)
rc2 <- colorRampPalette(colors = c("white", "blue"), space = "Lab")(1000)
## Combine the two color palettes
rampcols <- c(rc1, rc2)
mypal <- colorNumeric(palette = rampcols, domain = elec_combined_geo$CHANGE_KWH)
leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_combined_geo,
fillColor = ~mypal(CHANGE_KWH),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE_KWH),
" change in kWh in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_combined_geo,
pal = mypal,
values = ~CHANGE_KWH,
title = "Change in kWh<br>between<br>12-2019 and 02-2020"
)
Comments: It was difficult to get the legend right, since the range of increase/decrease in electricity usage is so wide. Generally, most zip codes decreased electricity usage between december 2019 and february 2020. 94544 decreased the most, while there was a spike right next to it in 94545.
Assumptions made:
I made the assumption that covid started in january 2020. I used december 2019 and february 2020 to frame that month, as to highlight before and after the start of the pandemic. However, a better time frame would probably be when students left campus and places started shutting down, so like between february 2020 and april 2020.
I also only included residential and commercial energy usage by PG&E, not other areas like agricultural use.